This is the work for machine faillure prediction. We are predicting, the state of the machine in one hour, accordding to what we learn from the predecedent data set.
#################################### Preparing the data set ################################################
# Installation of packages and loading them ###########################
#### Installation of necessary packages, need to do it only one time
#install.packages("corrplot")
# install.packages("caret")
#install.packages("randomForest")
# install.packages("MASS")
# install.packages("rpart")
# install.packages("e1071")
#install.packages("glmnet")
#install.pacakges("plotly")
#install.packages("missMDA")
#install.packages("pROC")
#install.packages("DMwR")
#install.packages("gbm")
# install.packages("rattle")
# install.packages("rpart.plot")
# install.packages("RColorBrewer")
# install.packages("party")
# install.packages("partykit")
#### Loading the necessary packages
library(pROC) #for the roc curve methods
library("MASS")
library("rpart")
library("randomForest")
library("e1071")
library("glmnet")
library(plotly)
library(ggplot2)
library(missMDA)
library(caret)
library(DMwR)
library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(party)
library(partykit)
library(caret)
then we set the directory loading the data set resampled in one hour interval
setwd("/home/moustapha/Energiency Big Data Project/Archive")
data = read.table("all1h1.csv", header = TRUE, sep = ",")
#data = read.table("all1h2.csv", header = TRUE, sep = ",")
DateTS <- as.POSIXlt(data$X, format = "%Y-%m-%d %H:%M:%S")
data$X = DateTS ; colnames(data)[1] = "date" ;rownames(data) = data$date
## date prodh elec
## Min. :2012-12-31 23:00:00 Min. : 0.00 Min. : 0.00
## 1st Qu.:2013-10-20 21:45:00 1st Qu.:28.04 1st Qu.:13.61
## Median :2014-08-09 20:30:00 Median :32.97 Median :14.23
## Mean :2014-08-09 20:57:44 Mean :27.11 Mean :12.20
## 3rd Qu.:2015-05-29 19:15:00 3rd Qu.:35.03 3rd Qu.:14.53
## Max. :2016-03-17 18:00:00 Max. :38.49 Max. :15.69
## NA's :1594 NA's :1284
## gram planstop prod
## Min. : 0 Min. :0.000 Min. : 0.00
## 1st Qu.: 42 1st Qu.:1.000 1st Qu.:27.89
## Median : 45 Median :1.000 Median :32.48
## Mean : 43640 Mean :0.802 Mean :26.54
## 3rd Qu.: 45 3rd Qu.:1.000 3rd Qu.:34.61
## Max. :420010 Max. :1.000 Max. :38.22
## NA's :21747 NA's :17623 NA's :22957